#file="DRC_CASE_CONTROL_SERO.MALcorrected"
#file=/Volumes/TgenGwas/Plink/tgen-b1-qc
file=SchistoCandGenes.1kg.h3a
#pop=ALL
flank=10000
maxHap=20
#rm Genes.bed
for pop in ALL GWD MSL YRI ESN LWK
do
    rm All.HapsStats.txt
    rm Summary.HapStats.txt
    suffix=1kg.h3a.$pop
    for gene in IFNG	IL10	IL13	IL4	IL5	STAT6	CTLA4	FCN2	COLEC11	ABO	IL17A	IL17B	CRP	IL6R	IL17F	IL9	CD14	CXCL14	IL3	IL12B	VEGFA	CTGF	IL22RA2	NOS3	SHH
   #for gene in  HLAG IFNG IL4 IL6
   #for gene in IL4
    do
	echo "$gene "
	#Get a list of samples to include for population
	#grep $pop $file.fam > $pop.keep

	#Get chr, start and end position of gene from ../mart export file and add flanks at each end
	chr=`grep -w $gene ../genes.ensGRCh37.txt | grep -v HSC | grep -v PATCH | awk -v gen=$gene '$5 == gen {print $1}'`;
	start=`grep -w $gene ../genes.ensGRCh37.txt | grep -v HSC | grep -v PATCH | awk -v gen=$gene '$5 == gen {print $2}'`;
	end=`grep -w $gene ../genes.ensGRCh37.txt | grep -v HSC | grep -v PATCH | awk -v gen=$gene '$5 == gen {print $3}'`;
	end="$(($end+$flank))"
	start="$(($start-$flank))"
	echo "$chr	$start	$end	$gene" >> Genes.bed

	#Make plink files for gene and population. Change $pop.keep to $file.fam to just analyse all  populations together
	#echo "plink --bfile $file --keep $file.fam --chr $chr --from-bp $start --to-bp $end --recode --out $gene > Plink.$gene.$pop.log"
	plink --bfile $file --keep $pop.keep --chr $chr --from-bp $start --to-bp $end --recode --out $gene.$suffix > Plink.$gene.$suffix.$pop.log
	plink --file $gene.$suffix  --freq --out $gene.$suffix >> Plink.$gene.$suffix.$pop.log
	
	#Make geneinfo file with start end end position for each gene. Geneinfo not used by BigLD but it is used by Gpart so could be useful for larger projects 
	max=$(awk -v max=0 '{if($4>max){max=$4}}END{print max} ' $gene.map)
	min=$(awk -v min=1000000000 '{if($4<min){min=$4}}END{print min} ' $gene.map)
	awk -v ma=$max -v mi=$min -v ge=$gene 'BEGIN {print "genename chrN start.bp end.bp"}{print ge,$1,mi,ma}' $gene.map >> $gene.geneinfo
	
	#Run rscript to get haplotype blocks start and end positions
	Rscript runBigLD.R $gene.$suffix $maxHap > $gene.log 2>&1 

	#Run Perl script to extract haplotype sequences
	perl makeHaplotypes.pl $gene.$suffix $pop
	rm $gene.$suffix.ped
	rm $gene.$suffix.map
	rm $gene.$suffix.geneinfo
	rm $gene.$suffix.bigLD.txt
	rm $gene.$suffix.log
	rm Plink.$gene.$suffix.$pop.log
    done
    mv All.HapsStats.txt  All.HapsStats.$suffix.txt
    mv Summary.HapStats.txt  Summary.HapStats.$suffix.txt
done

